Installing Packages

library(pacman)
p_load(arules, arulesViz, ggplot2, tidyrules, dplyr, C50, pander)

Read list of transactions

dataset <- read.transactions("./AssociationRules.csv", sep=" ")

Summary of dataset

Max quantity

freq_tab <- data.frame(itemFrequency(dataset, type="absolute"))
freq_tab <- cbind(rownames(freq_tab), freq_tab)
rownames(freq_tab) <- NULL
names(freq_tab) <- c("item", "freq")
freq_tab[freq_tab$freq == max(freq_tab$freq),]

Make association rules

rules <- apriori(data=dataset, parameter=list(support=0.01, 
                                             confidence=0,
                                             target="rules",
                                             minlen=2))
Apriori

Parameter specification:

Algorithmic control:

Absolute minimum support count: 100 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[98 item(s), 10000 transaction(s)] done [0.01s].
sorting and recoding items ... [89 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.02s].
writing ... [11435 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].

Summary rules

summary(rules)
set of 11435 rules

rule length distribution (lhs + rhs):sizes
   2    3    4    5 
2952 7206 1272    5 

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  2.000   2.000   3.000   2.854   3.000   5.000 

summary of quality measures:
    support         confidence         coverage            lift             count     
 Min.   :0.0100   Min.   :0.02183   Min.   :0.01000   Min.   : 0.6717   Min.   : 100  
 1st Qu.:0.0114   1st Qu.:0.16958   1st Qu.:0.03935   1st Qu.: 1.0103   1st Qu.: 114  
 Median :0.0140   Median :0.25000   Median :0.06320   Median : 1.1261   Median : 140  
 Mean   :0.0182   Mean   :0.28012   Mean   :0.08636   Mean   : 1.2302   Mean   : 182  
 3rd Qu.:0.0197   3rd Qu.:0.36070   3rd Qu.:0.10340   3rd Qu.: 1.2828   3rd Qu.: 197  
 Max.   :0.1877   Max.   :1.00000   Max.   :0.49480   Max.   :19.4205   Max.   :1877  

mining info:
length(rules)
[1] 11435
inspect(rules)

More confidence rules

rules.hconf <- subset(rules, confidence >= 0.5)
length(rules.hconf)
[1] 1165
# Some 10 sample
rules.top <- sample(rules, 10)
# High confidence rules
rules.conf <- sort(rules, by="confidence", decreasing=TRUE)
# High lift rules
rules.hlift <- sort(rules, by="lift", decreasing=TRUE)
rules.llift <- sort(rules, by="lift", decreasing=FALSE)
# Top 10 rules by confidence
rules.top_conf <- head(rules.conf, n=10)
# Top 10 rules by lift
rules.top_lift = head(rules.hlift, n=10)

Plot of all finded rules

plot(rules, 
     method="scatterplot", 
     measure=c("support", "confidence"), 
     shading="lift",
     jitter=0.2
)

Plot of hight confidence rules

plot(rules.hconf, 
     method="scatterplot", 
     measure=c("support", "confidence"), 
     shading="lift",
     jitter=0.2
)

Compare support and lift

plot(rules, 
     method="scatterplot", 
     measure=c("support", "lift"), 
     shading="confidence",
     jitter=0.2
)

plot(rules.hconf, 
     method="scatterplot", 
     measure=c("support", "lift"), 
     shading="confidence",
     jitter=0.2
)

head(quality(rules.hconf))
rules.hsup <- subset(rules, support >= 0.1)

plot(rules.hsup, 
     method="scatterplot", 
     measure=c("support", "confidence"), 
     shading="lift",
     engine="htmlwidget"
)
inspect(head(rules.hsup, n=3, by="confidence", decreasong=TRUE))

Point k

inspect(head(rules.hlift, 10))
plot(head(rules.hlift, 10), 
     method="matrix",
     measure="lift",
     control=list(recorder=FALSE),
     engine="htmlwidget")
Unknown control parameters: recorder
Available control parameters (with default values):
interactive  =  TRUE
engine   =  htmlwidget
max  =  1000
colors   =  c("#EE0000FF", "#EEEEEEFF")
reorder  =  measure
precision    =  3
verbose  =  FALSE
inspect(head(rules.llift, 10))
plot(head(rules.llift, 10), 
     method="matrix", 
     measure="lift",
     control=list(recorder=FALSE),
     engine="htmlwidget")
Unknown control parameters: recorder
Available control parameters (with default values):
interactive  =  TRUE
engine   =  htmlwidget
max  =  1000
colors   =  c("#EE0000FF", "#EEEEEEFF")
reorder  =  measure
precision    =  3
verbose  =  FALSE
plot(rules, 
     method="scatterplot", 
     measure=c("support", "confidence"), 
     shading="lift",
     engine="htmlwidget"
)
plot: Too many rules supplied. Only plotting the best 1000 rules using measure lift (change parameter max if needed)To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rules, 
     method="scatterplot", 
     measure=c("support", "lift"), 
     shading="confidence",
     engine="htmlwidget"
)
plot: Too many rules supplied. Only plotting the best 1000 rules using measure confidence (change parameter max if needed)To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rules.top_conf, method="paracoord")

plot(rules.top_lift, method="paracoord")

plot(rules.top_lift, method="grouped")

plot(rules.top_conf, method="graph", engine="htmlwidget")
top_3_lift = head(rules.top_lift, n=3)

plot(top_3_lift, method="graph")

rules.hight_trustly <- subset(rules, confidence >= 0.8)

first_38_conf = head(sort(rules.hight_trustly, 
                          by="lift", 
                          decreasing=TRUE), n=40)

plot(rules.hight_trustly, 
     method="matrix", 
     shading=c("lift", "confidence"), 
     measure=c("lift", "confidence"), 
     control=list(reorder=FALSE)
)
Itemsets in Antecedent (LHS)
 [1] "{item55}"               "{item83}"               "{item23}"               "{item10,item44}"       
 [5] "{item20,item23}"        "{item23,item5}"         "{item49,item56}"        "{item15,item49}"       
 [9] "{item82,item99}"        "{item15,item49,item56}" "{item30,item49,item56}" "{item15,item30,item49}"
[13] "{item49,item56,item84}" "{item30,item49,item84}" "{item15,item49,item84}" "{item49,item77,item84}"
[17] "{item5,item82,item99}"  "{item13,item82,item99}" "{item15,item56,item77}" "{item30,item56,item77}"
[21] "{item15,item56,item84}" "{item15,item30,item56}" "{item22,item3,item41}"  "{item10,item22,item41}"
[25] "{item25,item34,item77}" "{item16,item34,item77}" "{item20,item25,item41}" "{item16,item25,item77}"
[29] "{item16,item61,item77}" "{item30,item95,item96}" "{item3,item84,item95}" 
Itemsets in Consequent (RHS)
 [1] "{item34}" "{item13}" "{item15}" "{item56}" "{item84}" "{item30}" "{item5}"  "{item77}" "{item10}"
[10] "{item3}"  "{item92}"

Train model prediction

dataset.train <- dataset[1:8000,]
dataset.test <- dataset[8001:10000,]
model <- apriori(data=dataset.train, parameter=list(support=0.01, 
                                                    confidence=0.1,
                                                    target="rules",
                                                    minlen=2))
Apriori

Parameter specification:

Algorithmic control:

Absolute minimum support count: 80 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[98 item(s), 8000 transaction(s)] done [0.01s].
sorting and recoding items ... [89 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.02s].
writing ... [10747 rule(s)] done [0.00s].
creating S4 object  ... done [0.01s].
validate_model <- apriori(data=dataset.test, parameter=list(support=0.01,
                                                            confidence=0.1,
                                                            target="rules",
                                                            minlen=2))
Apriori

Parameter specification:

Algorithmic control:

Absolute minimum support count: 20 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[98 item(s), 2000 transaction(s)] done [0.00s].
sorting and recoding items ... [89 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.01s].
writing ... [12238 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].
train_df <- DATAFRAME(model)
validate_df <- DATAFRAME(validate_model)

Mean support for trained rules

cat('Support train:\t', mean(train_df$support), '\n')
Support train:   0.01837056 

Mean confidence for trained rules

cat('Confidence train:\t', mean(train_df$confidence), '\n')
Confidence train:    0.2964712 

Mean support for validate rules

cat('Support validate:\t', mean(validate_df$support), '\n')
Support validate:    0.01776226 

Mean confidence for validate rules

cat('Confidence validate:\t', mean(validate_df$confidence), '\n')
Confidence validate:     0.3081568 
LS0tCnRpdGxlOiAiQXNzb2NpYXRpb24gUnVsZXMiCmF1dGhvcjogYWVyb3NoZXYKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQojIyMgSW5zdGFsbGluZyBQYWNrYWdlcyAKYGBge3J9CmxpYnJhcnkocGFjbWFuKQpwX2xvYWQoYXJ1bGVzLCBhcnVsZXNWaXosIGdncGxvdDIsIHRpZHlydWxlcywgZHBseXIsIEM1MCwgcGFuZGVyKQpgYGAKCiMjIyBSZWFkIGxpc3Qgb2YgdHJhbnNhY3Rpb25zCmBgYHtyfQpkYXRhc2V0IDwtIHJlYWQudHJhbnNhY3Rpb25zKCIuL0Fzc29jaWF0aW9uUnVsZXMuY3N2Iiwgc2VwPSIgIikKYGBgCgojIyBTdW1tYXJ5IG9mIGRhdGFzZXQKIyMjIE1heCBxdWFudGl0eQpgYGB7cn0KZnJlcV90YWIgPC0gZGF0YS5mcmFtZShpdGVtRnJlcXVlbmN5KGRhdGFzZXQsIHR5cGU9ImFic29sdXRlIikpCmZyZXFfdGFiIDwtIGNiaW5kKHJvd25hbWVzKGZyZXFfdGFiKSwgZnJlcV90YWIpCnJvd25hbWVzKGZyZXFfdGFiKSA8LSBOVUxMCm5hbWVzKGZyZXFfdGFiKSA8LSBjKCJpdGVtIiwgImZyZXEiKQpmcmVxX3RhYltmcmVxX3RhYiRmcmVxID09IG1heChmcmVxX3RhYiRmcmVxKSxdCmBgYAoKIyMjIE1vc3QgcG9wdWxhciBpdGVtCmBgYHtyfQppdGVtRnJlcXVlbmN5UGxvdChkYXRhc2V0LCB0eXBlPSJhYnNvbHV0ZSIsIHRvcE49MTApIApgYGAKYGBge3J9CnN1bW1hcnkoZGF0YXNldCkKYGBgCgoKIyMgTWFrZSBhc3NvY2lhdGlvbiBydWxlcwpgYGB7cn0KcnVsZXMgPC0gYXByaW9yaShkYXRhPWRhdGFzZXQsIHBhcmFtZXRlcj1saXN0KHN1cHBvcnQ9MC4wMSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbmZpZGVuY2U9MCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGFyZ2V0PSJydWxlcyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1pbmxlbj0yKSkKYGBgCgojIyMgU3VtbWFyeSBydWxlcwpgYGB7cn0Kc3VtbWFyeShydWxlcykKbGVuZ3RoKHJ1bGVzKQpgYGAKYGBge3J9Cmluc3BlY3QocnVsZXMpCmBgYAoKIyMjIE1vcmUgY29uZmlkZW5jZSBydWxlcwpgYGB7cn0KcnVsZXMuaGNvbmYgPC0gc3Vic2V0KHJ1bGVzLCBjb25maWRlbmNlID49IDAuNSkKbGVuZ3RoKHJ1bGVzLmhjb25mKQpgYGAKCgpgYGB7cn0KIyBTb21lIDEwIHNhbXBsZQpydWxlcy50b3AgPC0gc2FtcGxlKHJ1bGVzLCAxMCkKIyBIaWdoIGNvbmZpZGVuY2UgcnVsZXMKcnVsZXMuY29uZiA8LSBzb3J0KHJ1bGVzLCBieT0iY29uZmlkZW5jZSIsIGRlY3JlYXNpbmc9VFJVRSkKIyBIaWdoIGxpZnQgcnVsZXMKcnVsZXMuaGxpZnQgPC0gc29ydChydWxlcywgYnk9ImxpZnQiLCBkZWNyZWFzaW5nPVRSVUUpCnJ1bGVzLmxsaWZ0IDwtIHNvcnQocnVsZXMsIGJ5PSJsaWZ0IiwgZGVjcmVhc2luZz1GQUxTRSkKIyBUb3AgMTAgcnVsZXMgYnkgY29uZmlkZW5jZQpydWxlcy50b3BfY29uZiA8LSBoZWFkKHJ1bGVzLmNvbmYsIG49MTApCiMgVG9wIDEwIHJ1bGVzIGJ5IGxpZnQKcnVsZXMudG9wX2xpZnQgPSBoZWFkKHJ1bGVzLmhsaWZ0LCBuPTEwKQpgYGAKCgojIyMgUGxvdCBvZiBhbGwgZmluZGVkIHJ1bGVzCmBgYHtyfQpwbG90KHJ1bGVzLCAKICAgICBtZXRob2Q9InNjYXR0ZXJwbG90IiwgCiAgICAgbWVhc3VyZT1jKCJzdXBwb3J0IiwgImNvbmZpZGVuY2UiKSwgCiAgICAgc2hhZGluZz0ibGlmdCIsCiAgICAgaml0dGVyPTAuMgopCmBgYAoKIyMjIFBsb3Qgb2YgaGlnaHQgY29uZmlkZW5jZSBydWxlcwpgYGB7cn0KcGxvdChydWxlcy5oY29uZiwgCiAgICAgbWV0aG9kPSJzY2F0dGVycGxvdCIsIAogICAgIG1lYXN1cmU9Yygic3VwcG9ydCIsICJjb25maWRlbmNlIiksIAogICAgIHNoYWRpbmc9ImxpZnQiLAogICAgIGppdHRlcj0wLjIKKQpgYGAKCiMjIyBDb21wYXJlIHN1cHBvcnQgYW5kIGxpZnQKYGBge3J9CnBsb3QocnVsZXMsIAogICAgIG1ldGhvZD0ic2NhdHRlcnBsb3QiLCAKICAgICBtZWFzdXJlPWMoInN1cHBvcnQiLCAibGlmdCIpLCAKICAgICBzaGFkaW5nPSJjb25maWRlbmNlIiwKICAgICBqaXR0ZXI9MC4yCikKYGBgCmBgYHtyfQpwbG90KHJ1bGVzLmhjb25mLCAKICAgICBtZXRob2Q9InNjYXR0ZXJwbG90IiwgCiAgICAgbWVhc3VyZT1jKCJzdXBwb3J0IiwgImxpZnQiKSwgCiAgICAgc2hhZGluZz0iY29uZmlkZW5jZSIsCiAgICAgaml0dGVyPTAuMgopCmBgYApgYGB7cn0KaGVhZChxdWFsaXR5KHJ1bGVzLmhjb25mKSkKYGBgCmBgYHtyfQpydWxlcy5oc3VwIDwtIHN1YnNldChydWxlcywgc3VwcG9ydCA+PSAwLjEpCgpwbG90KHJ1bGVzLmhzdXAsIAogICAgIG1ldGhvZD0ic2NhdHRlcnBsb3QiLCAKICAgICBtZWFzdXJlPWMoInN1cHBvcnQiLCAiY29uZmlkZW5jZSIpLCAKICAgICBzaGFkaW5nPSJsaWZ0IiwKICAgICBlbmdpbmU9Imh0bWx3aWRnZXQiCikKYGBgCgpgYGB7cn0KaW5zcGVjdChoZWFkKHJ1bGVzLmhzdXAsIG49MywgYnk9ImNvbmZpZGVuY2UiLCBkZWNyZWFzb25nPVRSVUUpKQpgYGAKCgojIyMgUG9pbnQgawpgYGB7cn0KaW5zcGVjdChoZWFkKHJ1bGVzLmhsaWZ0LCAxMCkpCmBgYApgYGB7cn0KcGxvdChoZWFkKHJ1bGVzLmhsaWZ0LCAxMCksIAogICAgIG1ldGhvZD0ibWF0cml4IiwKICAgICBtZWFzdXJlPSJsaWZ0IiwKICAgICBjb250cm9sPWxpc3QocmVjb3JkZXI9RkFMU0UpLAogICAgIGVuZ2luZT0iaHRtbHdpZGdldCIpCmBgYAoKYGBge3J9Cmluc3BlY3QoaGVhZChydWxlcy5sbGlmdCwgMTApKQpgYGAKYGBge3J9CnBsb3QoaGVhZChydWxlcy5sbGlmdCwgMTApLCAKICAgICBtZXRob2Q9Im1hdHJpeCIsIAogICAgIG1lYXN1cmU9ImxpZnQiLAogICAgIGNvbnRyb2w9bGlzdChyZWNvcmRlcj1GQUxTRSksCiAgICAgZW5naW5lPSJodG1sd2lkZ2V0IikKYGBgCgoKCmBgYHtyfQpwbG90KHJ1bGVzLCAKICAgICBtZXRob2Q9InNjYXR0ZXJwbG90IiwgCiAgICAgbWVhc3VyZT1jKCJzdXBwb3J0IiwgImNvbmZpZGVuY2UiKSwgCiAgICAgc2hhZGluZz0ibGlmdCIsCiAgICAgZW5naW5lPSJodG1sd2lkZ2V0IgopCmBgYApgYGB7cn0KcGxvdChydWxlcywgCiAgICAgbWV0aG9kPSJzY2F0dGVycGxvdCIsIAogICAgIG1lYXN1cmU9Yygic3VwcG9ydCIsICJsaWZ0IiksIAogICAgIHNoYWRpbmc9ImNvbmZpZGVuY2UiLAogICAgIGVuZ2luZT0iaHRtbHdpZGdldCIKKQpgYGAKCmBgYHtyfQpwbG90KHJ1bGVzLnRvcF9jb25mLCBtZXRob2Q9InBhcmFjb29yZCIpCmBgYAoKYGBge3J9CnBsb3QocnVsZXMudG9wX2xpZnQsIG1ldGhvZD0icGFyYWNvb3JkIikKYGBgCgpgYGB7cn0KcGxvdChydWxlcy50b3BfbGlmdCwgbWV0aG9kPSJncm91cGVkIikKYGBgCgpgYGB7cn0KcGxvdChydWxlcy50b3BfY29uZiwgbWV0aG9kPSJncmFwaCIsIGVuZ2luZT0iaHRtbHdpZGdldCIpCmBgYApgYGB7cn0KdG9wXzNfbGlmdCA9IGhlYWQocnVsZXMudG9wX2xpZnQsIG49MykKCnBsb3QodG9wXzNfbGlmdCwgbWV0aG9kPSJncmFwaCIpCmBgYAoKYGBge3J9CnJ1bGVzLmhpZ2h0X3RydXN0bHkgPC0gc3Vic2V0KHJ1bGVzLCBjb25maWRlbmNlID49IDAuOCkKCnRhaWxfNDBfY29uZiA9IHRhaWwoc29ydChydWxlcy5oaWdodF90cnVzdGx5LCAKICAgICAgICAgICAgICAgICAgICAgICAgICBieT0ibGlmdCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgIGRlY3JlYXNpbmc9VFJVRSksIG49NDApCgpwbG90KHJ1bGVzLmhpZ2h0X3RydXN0bHksIAogICAgIG1ldGhvZD0ibWF0cml4IiwgCiAgICAgc2hhZGluZz1jKCJsaWZ0IiwgImNvbmZpZGVuY2UiKSwgCiAgICAgbWVhc3VyZT1jKCJsaWZ0IiwgImNvbmZpZGVuY2UiKSwgCiAgICAgY29udHJvbD1saXN0KHJlb3JkZXI9RkFMU0UpCikKYGBgCiMjIFRyYWluIG1vZGVsIHByZWRpY3Rpb24KYGBge3J9CmRhdGFzZXQudHJhaW4gPC0gZGF0YXNldFsxOjgwMDAsXQpkYXRhc2V0LnRlc3QgPC0gZGF0YXNldFs4MDAxOjEwMDAwLF0KYGBgCgpgYGB7cn0KbW9kZWwgPC0gYXByaW9yaShkYXRhPWRhdGFzZXQudHJhaW4sIHBhcmFtZXRlcj1saXN0KHN1cHBvcnQ9MC4wMSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb25maWRlbmNlPTAuMSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRhcmdldD0icnVsZXMiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWlubGVuPTIpKQp2YWxpZGF0ZV9tb2RlbCA8LSBhcHJpb3JpKGRhdGE9ZGF0YXNldC50ZXN0LCBwYXJhbWV0ZXI9bGlzdChzdXBwb3J0PTAuMDEsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbmZpZGVuY2U9MC4xLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0YXJnZXQ9InJ1bGVzIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWlubGVuPTIpKQpgYGAKCmBgYHtyfQp0cmFpbl9kZiA8LSBEQVRBRlJBTUUobW9kZWwpCnZhbGlkYXRlX2RmIDwtIERBVEFGUkFNRSh2YWxpZGF0ZV9tb2RlbCkKYGBgCgojIyMgTWVhbiBzdXBwb3J0IGZvciB0cmFpbmVkIHJ1bGVzIApgYGB7cn0KY2F0KCdTdXBwb3J0IHRyYWluOlx0JywgbWVhbih0cmFpbl9kZiRzdXBwb3J0KSwgJ1xuJykKYGBgCgojIyMgTWVhbiBjb25maWRlbmNlIGZvciB0cmFpbmVkIHJ1bGVzIApgYGB7cn0KY2F0KCdDb25maWRlbmNlIHRyYWluOlx0JywgbWVhbih0cmFpbl9kZiRjb25maWRlbmNlKSwgJ1xuJykKYGBgCgojIyMgTWVhbiBzdXBwb3J0IGZvciB2YWxpZGF0ZSBydWxlcyAKYGBge3J9CmNhdCgnU3VwcG9ydCB2YWxpZGF0ZTpcdCcsIG1lYW4odmFsaWRhdGVfZGYkc3VwcG9ydCksICdcbicpCmBgYAoKIyMjIE1lYW4gY29uZmlkZW5jZSBmb3IgdmFsaWRhdGUgcnVsZXMgCmBgYHtyfQpjYXQoJ0NvbmZpZGVuY2UgdmFsaWRhdGU6XHQnLCBtZWFuKHZhbGlkYXRlX2RmJGNvbmZpZGVuY2UpLCAnXG4nKQpgYGAKCmBgYHtyfQoKYGBgCgoK